#-------------------------------------------------------------------------------
#
# Electoral predictors - descriptive number of polls and time window
#
# Author: Sina Chen
#
#-------------------------------------------------------------------------------


# Libraries ---------------------------------------------------------------

{
  library(ggplot2)
  library(dplyr)
  library(cowplot)
  # library(grid)
  # library(gridExtra)
  # library(stringi)
  # library(ggrepel)
  # library(usdata)
}


# Data --------------------------------------------------------------------

polls <- readRDS("data/us_senate_polls1990_2022_final.RDS")



# Preparation -------------------------------------------------------------

# remove polls conducted before the porevious election
polls <- polls %>% 
  filter(dte < 1461)  %>% 
  mutate(wte = difftime(strptime(election_date, format = "%m/%d/%y"),
                        strptime(end_date, format = "%m/%d/%y"), units="weeks") %>% 
           floor())

# weekly data
polls_week <- polls %>% 
  group_by(cycle, wte) %>% 
  summarise(n_poll = n()) %>% 
  ungroup()

# subset polls up to 100 days
polls100 <- polls %>% 
  filter(dte < 101)

# summarise by election
election_data  <- polls %>% 
  group_by(state, cycle) %>% 
  summarise(dte_election_max = max(dte, na.rm = T),
            n_poll = n()) %>% 
  ungroup() %>% 
  mutate(timw_window = "All polls")

election_data100  <- polls100 %>% 
  group_by(state, cycle) %>% 
  summarise(dte_election_max = max(dte, na.rm = T),
            n_poll = n()) %>% 
  ungroup() %>% 
  mutate(timw_window = "100 days")

# combine 
election_data_combined <- rbind(election_data, election_data100)

# summarise by cycle
cycle_data <- election_data %>% 
  group_by(cycle) %>% 
  summarise(dte_cycle_avg = mean(dte_election_max),
            n_poll_cycle_avg = mean(n_poll, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(timw_window = "All polls")

cycle_data100 <- election_data100 %>% 
  group_by(cycle) %>% 
  summarise(dte_cycle_avg = mean(dte_election_max),
            n_poll_cycle_avg = mean(n_poll, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(timw_window = "100 days")

# combine
cycle_data_combined <- rbind(cycle_data, cycle_data100)


# Plots -------------------------------------------------------------------

# number of polls per week
n_poll_week_plot <- ggplot(polls_week) +
  geom_line(aes(x = wte, y = n_poll)) +
  facet_wrap(~ cycle, ncol = 3) +
  theme_bw() +
  labs(x = "Weeks to election", y = "No. polls") +
  theme(text = element_text(size = 16),
        axis.text = element_text(size = 14),
        axis.text.x = element_text(angle = 45, margin = margin(t = 6)),
        strip.text = element_text(size = 16)) +
  scale_x_reverse()

# save
ggsave(filename = 'code/results_vis/plots/fgA1.eps', 
       plot = n_poll_week_plot, 
       width = 14, height = 10, bg='#ffffff', device=cairo_ps, dpi = 1200) 

# max. time window plot
avg_dte_plot <- ggplot() +
  geom_jitter(data = election_data,
             aes(x = cycle, y = dte_election_max), alpha = 0.6, width = 0.25) +
  geom_path(data = cycle_data, aes(x = as.factor(cycle), y = as.numeric(dte_cycle_avg), 
                                   group = 1)) +
  theme_bw() +
  theme(text = element_text(size = 16),
        axis.text = element_text(size = 14),
        axis.text.x = element_blank(),
        plot.margin=grid::unit(c(2,10,2,12), "mm")) +
  labs(x = "", y = "Max. time window (days)")

# no. polls plot
n_polls_plot <- ggplot() +
  geom_jitter(data = election_data_combined, aes(x = cycle, y = n_poll,
                                                 colour = timw_window),
              width = 0.25, alpha = 0.6) +
  geom_line(data = cycle_data_combined, aes(x = as.factor(cycle), 
                                            y = n_poll_cycle_avg,
                                            group = timw_window,
                                            colour = timw_window)) +
  theme_bw() +
  theme(text = element_text(size = 16),
        axis.text = element_text(size = 14),
        legend.position = "bottom",
        legend.text = element_text(size = 16),
        plot.margin=grid::unit(c(2,10,2,12), "mm")) +
  labs(x = "", y = "No. polls", colour = "") +
  scale_color_manual(values = c("darkgrey", "black")) 

# combine plots
avg_dte_n_poll_plot <- plot_grid(avg_dte_plot, n_polls_plot, 
                        ncol = 1, align = "v",  rel_heights = c(1, 1.2), 
                        labels = "AUTO",
                        label_size = 16, label_x = 0.97, axis = "tblr")

# save
ggsave(filename = 'code/results_vis/plots/fgA2.eps', 
       plot = avg_dte_n_poll_plot, 
       width = 14, height = 8, bg='#ffffff', device=cairo_ps, dpi = 1200) 
